##############################
# Install packages, if needed.
# Load needed packages
# Define custom needed functions
##############################
RequiredPackages <- c(
## Viz
"gghalves",
"ggplot2",
"patchwork",
"showtext",
"ggrepel",
"scales",
"plotly",
"UpSetR",
"Cairo",
## Data Manipulation
"magrittr",
"data.table",
"tidyverse",
"tidyr",
"dplyr",
"janitor",
## Stats
"poweRlaw",
## NetSci
"NetSci",
"igraph",
"CoDiNA",
## Others
"progress",
"stringr",
"knitr",
"parallel"
)
for (i in RequiredPackages) {
if (!require(i, character.only = TRUE)) install.packages(i)
}
`%ni%` <- Negate(`%in%`)
##############################
# Define Colors
##############################
values = c("Protein Coding" = "#ef476f",
"TF" = "#f78c6b" ,
"Pseudogene" = "#06d6a0",
"miRNA" = "#83d483",
"other" = "#ffd166",
"lncRNA" = "#0cb0a9",
"ncRNA" = "#118ab2")
PPI_colors = c(PPI = "#8D3B72",
`PPI & NCI` = "#72E1D1")
##############################
# Define Functions
##############################
##############################
# Order Alfabetically
##############################
OrderNames <- function (M) {
M[,1] %<>% as.character()
M[,2] %<>% as.character()
n1 = apply(M[, 1:2], 1, min)
n2 = apply(M[, 1:2], 1, max)
M[,1] <- n1
M[,2] <- n2
return(M)
}
##############################
# Plot Disease Modules
##############################
plot_diseases = function(disease){
disease_genes = GDA %>%
filter(DiseaseName == disease) %>%
pull(hgnc_symbol) %>%
unique()
V(gPPI)$frame.color = NA;
V(gPPInc)$frame.color = NA
g_disease_PPI = gPPI %>%
induced_subgraph(vids =
disease_genes[disease_genes %in%
V(.)$name])
g_disease_PPINC = gPPInc %>%
induced_subgraph(vids =
disease_genes[disease_genes %in%
V(.)$name])
in_lcc_ncippi = g_disease_PPINC %>%
extract_LCC() %>%
V() %>%
unclass() %>%
names
in_lcc_ppi = g_disease_PPI %>%
extract_LCC() %>%
V() %>%
unclass() %>%
names
V(g_disease_PPI)$degree = igraph::degree(g_disease_PPI)
V(g_disease_PPINC)$degree = igraph::degree(g_disease_PPINC)
V(g_disease_PPI)$size =
igraph::degree(g_disease_PPI) %>%
CoDiNA::normalize() + 0.01
V(g_disease_PPINC)$size =
igraph::degree(g_disease_PPINC) %>%
CoDiNA::normalize() + 0.01
V(g_disease_PPI)$size %<>% ifelse(is.na(.), 0.5,.)
E(g_disease_PPI)$width = E(g_disease_PPI)$weight =
edge.betweenness(g_disease_PPI, directed = F) %>%
CoDiNA::normalize()
E(g_disease_PPINC)$width = E(g_disease_PPINC)$weight =
edge.betweenness(g_disease_PPINC, directed = F) %>%
CoDiNA::normalize()
E(g_disease_PPINC)$weight = (E(g_disease_PPINC)$weight) + 0.01
E(g_disease_PPI)$weight = E(g_disease_PPI)$weight+ 0.01
#
# fix colors
#
V(g_disease_PPI)$color =
ifelse(V(g_disease_PPI)$name %in% in_lcc_ppi,
V(g_disease_PPI)$color, "gray75")
V(g_disease_PPI)$frame.color =
V(g_disease_PPI)$color
###
###
V(g_disease_PPINC)$color =
ifelse(V(g_disease_PPINC)$name %in% in_lcc_ncippi,
V(g_disease_PPINC)$color, "gray75")
V(g_disease_PPINC)$frame.color =
V(g_disease_PPINC)$color
V(g_disease_PPI)$label =
ifelse(V(g_disease_PPI)$size >
quantile(V(g_disease_PPI)$size, 0.99),
V(g_disease_PPI)$name, NA)
V(g_disease_PPINC)$label =
ifelse(V(g_disease_PPINC)$size >
quantile(V(g_disease_PPINC)$size, 0.99),
V(g_disease_PPINC)$name, NA)
return(list(PPI = g_disease_PPI,
NCIPPI = g_disease_PPINC))
}
##############################
# Select Few Diseases as Reference
##############################
Interest = c("schizophrenia",
"crohn disease",
"arthritis rheumatoid",
"pre eclampsia",
"asthma",
"angina pectoris")
##############################
# Define Parameters to
# be used in the analysis
##############################
sabp = 0.05 # S_ab pvalue
lccp = 0.05 # LCC pvalue
comop = 0.01 # RR pvalue
overlapp = 0.01 # Hypergeometric-test p-valueNon-Coding RNAs Improve the Predictive Power of Network Medicine
Datasets
Protein-Protein Interactome (PPI)
The human Protein-Protein Interactome was derived from 21 public databases containing different types of experimentally-derived PPI data, as described in the manuscript.
Protein-Protein & Non-Coding Interactome (PPI & NCI)
The PPI & NCI is combined from nine publicly available databases that collect and curate experimentally-derived non-coding interactions: 1) miRNA-targets, derived from reporter assay, western blot, microarray, and next-generation sequencing experiments from mirTARbase; 2) miRNAs and lncRNAs interactions validated using CLIP-seq, AGO CLIP-seq, ChIRP-seq, HITS-CLIP and PAR-CLIP from lncBook, NPinter4, DIANA Tools, RAIN, and lncRNome; 3) RNA-RNA interactions validated from transcriptome-wide sequencing-based experiments (PARIS, SPLASH, LIGRseq, and MARIO), and targeted studies (RIAseq, RAP-RNA, and CLASH) from RISE; 4) Literature curated from miRNet and miRecords.
Gene Disease Association (GDA)
Data was combined from multiple sources, as described in the manuscript, such as GWAS from ClinGen, ClinVar, CTD, Disease Enhancer, DisGeNET, GWAS Catalog, HMDD45, lncBook, LncRNA disease, LOVD, Monarch, OMIM, Orphanet, PheGenI, and PsyGeNet.
Code For Reproducing Paper Results
##############################
# Load the Data
##############################
PPI = fread("../data/input/PPI.csv") ## PPI
glimpse(PPI)Rows: 536,965
Columns: 3
$ HGNC_Symbol.1 <chr> "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", …
$ HGNC_Symbol.2 <chr> "ABCC6", "ANXA7", "CDKN1A", "CRISP3", "FDXR", "GDPD1", "…
$ Source <chr> "HINT|PINA|APID|BioGrid|Instruct", "HINT|HIPPIE|PINA|API…
##############################
# PPI include:
# $ HGNC_Symbol.1: HGNC Gene Name for Node 1
# $ HGNC_Symbol.2: HGNC Gene Name for Node 2
# $ Source : Source of the interaction, "|" separated
##############################
Annot_Dic = fread("../data/input/gene_dictionary.txt") ## Gene Dictionary
glimpse(Annot_Dic)Rows: 43,413
Columns: 2
$ Symbol <chr> "A1BG", "A1BG-AS1", "A1CF", "A2M", "A2M-AS1", "A2ML1", "A2…
$ Category_cl <chr> "Protein Coding", "lncRNA", "Protein Coding", "Protein Cod…
##############################
# Annot_Dic include:
# $ Symbol : HGNC Gene Name
# $ Category_cl : HGNC Gene Category
# $ color : Color Definition for Viz
##############################
##
## Define the nodes colors based on gene category
##
Annot_Dic$Category_cl = factor(Annot_Dic$Category_cl,
levels = names(values))
Annot_Dic$color = values[unclass(Annot_Dic$Category_cl)]
NCPPI = fread("../data/input/ncPPI_PPI.csv") ## PPI & NCI
glimpse(NCPPI)Rows: 1,114,777
Columns: 3
$ HGNC_Symbol.1 <chr> "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", "A1BG", …
$ HGNC_Symbol.2 <chr> "ABCC6", "ANXA7", "CDKN1A", "CRISP3", "FDXR", "GDPD1", "…
$ Source <chr> "HINT|PINA|APID|BioGrid|Instruct", "HINT|HIPPIE|PINA|API…
##############################
# NCPPI include:
# $ HGNC_Symbol.1: HGNC Gene Name for Node 1
# $ HGNC_Symbol.2: HGNC Gene Name for Node 2
# $ Source : Source of the interaction, "|" separated
##############################
GDA = fread("../data/input/GDA.csv") ## Gene Disease Association
glimpse(GDA)Rows: 62,665
Columns: 6
$ DiseaseName <chr> "abortion habitual", "abortion habitual", "abortion hab…
$ hgnc_symbol <chr> "CREB5", "CTDSPL", "HOTAIR", "MIR100", "MIR106B", "MIR1…
$ evidences <chr> "CTD|DisGeNET_GDA", "OMIM", "LncRNAdisease", "HMDD", "H…
$ Total_Genes <int> 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,…
$ DescriptorName <chr> "Female Urogenital Diseases and Pregnancy Complications…
$ Category_cl <chr> "TF", "TF", "lncRNA", "miRNA", "miRNA", "miRNA", "miRNA…
##############################
# GDA include:
# DiseaseName : Disease Name in MeSH Terms
# MESHID : MESH ID of the disease
# hgnc_symbol : HGNC Gene Name
# evidence : Sources of association
# DescriptorName : MeSH categories for the disease
# Total_Genes : Total of Associated Genes
# Category_cl : HGNC Gene Category
############################################################
### Create the graphs:
### Edge list from the csv files (PPI, PPI & NCI)
### Node list from gene annotation
### Because the annotation may include genes that
### do not exist in the graph, we delete all nodes with
### degree = 0
##############################
### gPPI as the PPI graph
### Basic Network Analysis and
### Network caracterization
gPPI = graph_from_data_frame(PPI, directed = F,
vertices = Annot_Dic) %>%
delete.vertices(degree(.) == 0)
V(gPPI)$degree = degree(gPPI)
gPPIIGRAPH 0213925 UN-- 18217 536965 --
+ attr: name (v/c), Category_cl (v/c), color (v/c), degree (v/n),
| Source (e/c)
+ edges from 0213925 (vertex names):
[1] A1BG--ABCC6 A1BG--ANXA7 A1BG--CDKN1A A1BG--CRISP3 A1BG--FDXR
[6] A1BG--GDPD1 A1BG--GRB7 A1BG--HSP90AA1 A1BG--KCMF1 A1BG--KLK10
[11] A1BG--LGALS3 A1BG--MATN2 A1BG--MCM10 A1BG--NRF1 A1BG--PHF11
[16] A1BG--PRDX4 A1BG--PSME4 A1BG--RHBDD1 A1BG--RNF123 A1BG--SETD7
[21] A1BG--SMN1 A1BG--SMN2 A1BG--SNCA A1BG--TK1 A1BG--UBAC1
[26] A1BG--UBE2U A1BG--UBR4 A1BG--UBXN1 A1BG--WDR62 A1BG--ZBTB40
[31] A1CF--APOBEC1 A1CF--CAVIN2 A1CF--CELF2 A1CF--CYP46A1 A1CF--CYSRT1
+ ... omitted several edges
net_caracs = list()
net_caracs$PPI = data.frame(network = "PPI",
diameter = diameter(gPPI),
density = edge_density(gPPI, loops=TRUE),
components = components(gPPI)$no,
transitivity = igraph::transitivity(gPPI),
largest_cliques = clique_num(gPPI))
x = igraph::clusters(gPPI)
net_caracs$PPI$LCC = max(x$csize)
dist = distances(NetSci::extract_LCC(gPPI))
dist[upper.tri(dist, diag = T)] <- NA
net_caracs$PPI$av_sp = mean(dist, na.rm = T)
### gPPInc as the PPI & NCI graph
gPPInc = graph_from_data_frame(NCPPI, directed = F,
vertices = Annot_Dic) %>%
delete.vertices(degree(.) == 0)
V(gPPInc)$degree = degree(gPPInc)
gPPIncIGRAPH 4c0d4b5 UN-- 26575 1114777 --
+ attr: name (v/c), Category_cl (v/c), color (v/c), degree (v/n),
| Source (e/c)
+ edges from 4c0d4b5 (vertex names):
[1] A1BG--ABCC6 A1BG--ANXA7 A1BG--CDKN1A A1BG--CRISP3 A1BG--FDXR
[6] A1BG--GDPD1 A1BG--GRB7 A1BG--HSP90AA1 A1BG--KCMF1 A1BG--KLK10
[11] A1BG--LGALS3 A1BG--MATN2 A1BG--MCM10 A1BG--MIR1262 A1BG--MIR1288
[16] A1BG--MIR141 A1BG--MIR200A A1BG--MIR4645 A1BG--MIR4701 A1BG--MIR4772
[21] A1BG--MIR4793 A1BG--MIR508 A1BG--MIR542 A1BG--MIR5582 A1BG--MIR6512
[26] A1BG--MIR661 A1BG--MIR6720 A1BG--MIR6736 A1BG--MIR6742 A1BG--MIR6776
[31] A1BG--MIR6849 A1BG--MIR766 A1BG--MIR7703 A1BG--NRF1 A1BG--PHF11
+ ... omitted several edges
net_caracs$PPI_NCI = data.frame(network = "PPI & NCI",
diameter = diameter(gPPInc),
density = edge_density(gPPInc, loops=TRUE),
components = components(gPPInc)$no,
transitivity = igraph::transitivity(gPPInc),
largest_cliques = clique_num(gPPInc))
x = igraph::clusters(gPPInc)
net_caracs$PPI_NCI$LCC = max(x$csize)
dist = distances(NetSci::extract_LCC(gPPInc))
dist[upper.tri(dist, diag = T)] <- NA
net_caracs$PPI_NCI$av_sp = mean(dist, na.rm = T)
net_caracs %>%
bind_rows() %>%
pivot_longer(cols = - network) %>%
pivot_wider(., names_from = network) %>%
knitr::kable()| name | PPI | PPI & NCI |
|---|---|---|
| diameter | 7.000000e+00 | 9.000000e+00 |
| density | 3.235900e-03 | 3.156900e-03 |
| components | 1.300000e+01 | 5.200000e+01 |
| transitivity | 4.785070e-02 | 2.594430e-02 |
| largest_cliques | 5.700000e+01 | 5.700000e+01 |
| LCC | 1.820400e+04 | 2.648300e+04 |
| av_sp | 2.661309e+00 | 2.794652e+00 |
Table S1: Descriptive of Databases Genes and Reported Binding Experimental Interactions.
##############################
### Build a summary table (Table S1)
### Include the number of genes and interaction type
### in each database.
##############################
### Because the PPI is a subset of the
### PPI & NCI, we build the
### table only based on the PPI & NCI.
##############################
NCI = NCPPI
n_map = Annot_Dic %>%
select(Symbol,Category_cl)
NCI %<>%
dplyr::left_join(n_map, by = c("HGNC_Symbol.1"="Symbol")) %>%
rename(Category_A = Category_cl) %>%
dplyr::left_join(n_map, by = c("HGNC_Symbol.2"="Symbol")) %>%
rename(Category_B = Category_cl) %>%
unique()
NCI %<>%
mutate(id = 1:n())
NCI_PPI = NCI$Source %>%
stringr::str_split(., "\\|", simplify = TRUE) %>%
as.data.frame() %>%
mutate(id = 1:n())
NCI_PPI_l = list()
y = 223
i = nrow(NCI_PPI)/y
for (k in 1:y){
ini = (i*(k-1) + 1)
end = i*k
NCI_PPI_l[[k]] = NCI_PPI[ini:end, ]%>%
pivot_longer(-id, values_to = "source") %>%
select(-name) %>%
filter(source != "") %>%
unique() %>%
dplyr::inner_join(NCI, ., by = 'id') %>%
select(- c(Source, id )) %>%
unique()
}
NCI_PPI_l %<>%
bind_rows() %>%
unique()
pc = c("Protein Coding", "TF")
edges_summary = NCI_PPI_l %>%
mutate(type = ifelse(Category_A %in% pc &
Category_B %in% pc,
"PC <-> PC", "PC <-> NC")) %>%
mutate(type = ifelse(Category_A %ni% pc &
Category_B %ni% pc,
"NC <-> NC", type)) %>%
group_by(source, type) %>%
summarise(n = n()) %>%
pivot_wider(names_from = type,
values_from = n,
values_fill = 0)
nodes_summary =
NCI_PPI_l %>%
select(-starts_with("Category")) %>%
pivot_longer(cols = c(HGNC_Symbol.1, HGNC_Symbol.2)) %>%
select(-name) %>%
unique() %>%
dplyr::left_join(Annot_Dic,
by = c("value"="Symbol")) %>%
mutate(type =
ifelse(Category_cl %in% pc, "PC", "NC")) %>%
group_by(type, source) %>%
summarise(n = n()) %>%
pivot_wider(names_from = type, values_from = n)
summary_data = dplyr::inner_join(nodes_summary,
edges_summary)
summary_data %>%
knitr::kable()| source | NC | PC | NC <-> NC | PC <-> NC | PC <-> PC |
|---|---|---|---|---|---|
| APID | 411 | 15930 | 32 | 3151 | 169730 |
| BioGrid | 616 | 15308 | 31 | 9883 | 194221 |
| BioPlex | 386 | 13547 | 177 | 3912 | 104600 |
| CoFrac | 19 | 2959 | 3 | 205 | 13689 |
| DIANA | 2167 | 10087 | 15189 | 76060 | 0 |
| DIP | 2 | 1389 | 1 | 1 | 1285 |
| ENCODE | 1229 | 13808 | 1412 | 101569 | 23473 |
| HI-Union | 165 | 8882 | 17 | 1703 | 61969 |
| HINT | 320 | 14576 | 66 | 2528 | 116789 |
| HIPPIE | 100 | 8520 | 17 | 1437 | 121691 |
| InWeb | 67 | 10530 | 43 | 94 | 58507 |
| InateDB | 153 | 5548 | 1 | 251 | 13378 |
| Insider | 17 | 3362 | 18 | 12 | 3939 |
| Instruct | 206 | 11234 | 45 | 774 | 50456 |
| IntAct | 56 | 4567 | 10 | 131 | 9102 |
| Interactome3d | 68 | 7327 | 108 | 135 | 15071 |
| KinomeNetworkX | 7 | 2340 | 0 | 25 | 7367 |
| LNC_Book | 30 | NA | 21 | 0 | 0 |
| LitBM17 | 20 | 6020 | 1 | 46 | 13320 |
| MINT | 42 | 4871 | 6 | 121 | 10616 |
| NPInterv4 | 485 | 538 | 210 | 860 | 83 |
| PINA | 282 | 14895 | 12 | 958 | 163068 |
| PhosphoSitePlus | 7 | 2840 | 0 | 7 | 7014 |
| QUBIC | 34 | 5271 | 7 | 241 | 27453 |
| RAIN | 2973 | 13271 | 7840 | 383213 | 156 |
| RISE | 5101 | 15559 | 2333 | 26660 | 35091 |
| Signalink | 152 | 11279 | 5 | 415 | 51982 |
| lncRNome | 391 | 130 | 0 | 733 | 245 |
| miRNet | 347 | 917 | 14 | 2532 | 0 |
| miRTarBase | 511 | 2511 | 51 | 6616 | 0 |
| miRecords | 167 | 651 | 7 | 967 | 1 |
Table S2: Degreee Median and interquartile ranges for each gene category in all two networks.
##############################
### Basic Stats of degree in both networks
##############################
n2 = gPPI %>% igraph::as_data_frame(., "vertices")
n3 = gPPInc %>% igraph::as_data_frame(., "vertices")
n2 %<>%
mutate(base = "PPI")
n3 %<>%
mutate(base = "PPI & NCI")
Nodes = bind_rows(n2, n3)
Nodes %>%
group_by(Category_cl, base) %>%
summarise(median = quantile(degree, 0.50),
Q1 = quantile(degree, 0.250),
Q3 = quantile(degree, 0.750)) %>%
mutate( result = paste0(median, " [", Q1, "; ", Q3, "]")) %>%
select(Category_cl, base, result) %>%
pivot_wider(names_from = base,
values_from = result) %>%
knitr::kable()`summarise()` has grouped output by 'Category_cl'. You can override using the
`.groups` argument.
| Category_cl | PPI | PPI & NCI |
|---|---|---|
| Protein Coding | 30 [12; 64] | 54 [20; 107] |
| Pseudogene | NA | 2 [1; 4] |
| TF | 48 [20; 102] | 90 [41; 168] |
| lncRNA | NA | 3 [1; 6] |
| miRNA | NA | 52 [16; 224.75] |
| other | NA | 11 [3; 25] |
Figure S1: Network Validation: Gene-Interaction Overlap
##############################
### Upset Plot of Gene-Interaction Overlap
##############################
ps1 = NCI_PPI_l %>%
mutate(type = ifelse(Category_A %in% pc &
Category_B %in% pc, "PC <-> PC", "PC <-> NC")) %>%
mutate(type = ifelse(Category_A %ni% pc &
Category_B %ni% pc, "NC <-> NC", type)) %>%
group_by(HGNC_Symbol.1, HGNC_Symbol.2, type, source) %>%
summarise(n = n()) %>%
pivot_wider(names_from = source, values_from = n, values_fill = 0) %>%
as.data.frame() %>%
UpSetR::upset(., order.by = 'freq',
sets.x.label = 'Interactions (links)',
mainbar.y.label = 'Overlap',
queries = list(list(query = elements,
params = list("type", "PC <-> PC"),
color = "#8D3B72", active = T)),
# sets.bar.color = "#72E1D1",
main.bar.color = "#72E1D1",
nsets = 100,
number.angles = 30,
mb.ratio = c(0.5, 0.5),
text.scale = 1.5)
Cairo::CairoPDF("../figs/Fig_S1.pdf", width = 10, height = 10)
ps1dev.off()quartz_off_screen
2
Figure S2: Network Validation: Gene Overlap
##############################
### Upset Plot of Gene Overlap
##############################
ps2 = NCI_PPI_l %>%
select(starts_with("HGNC"),source) %>%
pivot_longer(-source) %>%
select(-name) %>%
unique() %>%
dplyr::left_join(., n_map, by = c('value' = "Symbol")) %>%
mutate(type = ifelse(Category_cl %in% pc, "PC", "NC")) %>%
select(-Category_cl) %>%
group_by(value, type, source) %>%
summarise(n = n()) %>%
pivot_wider(names_from = source,
values_from = n,
values_fill = 0) %>%
as.data.frame() %>%
UpSetR::upset(., order.by = 'freq',
sets.x.label = 'Genes (nodes)',
mainbar.y.label = 'Overlap',
# sets.bar.color = "#56B4E9",
# main.bar.color = "#56B4E9",
queries = list(list(query = elements,
params = list("type", "PC"),
color = "#8D3B72", active = T)),
# sets.bar.color = "#72E1D1",
main.bar.color = "#72E1D1",
nsets = 100,
number.angles = 30,
mb.ratio = c(0.5, 0.5),
text.scale = 1.5)
Cairo::CairoPDF("../figs/Fig_S2.pdf", width = 10, height = 10)
ps2dev.off()quartz_off_screen
2
Figure S3: Degree distribution of different genomic elements.
##############################
### Degree Distribution
##############################
gPPI %>%
degree(.) %>%
data.frame(degree = ., Symbol = names(.)) %>%
left_join(Annot_Dic) %>%
group_by(degree, Category_cl) %>%
summarise(k = n()) %>%
ggplot() +
aes(x = degree,
y = k,
fill = Category_cl,
colour = Category_cl) +
geom_point() +
scale_fill_manual(values = values) +
scale_color_manual(values = values) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10") +
labs(x = "Degree", y = "k",
color = "Gene Category",
fill = "Gene Category") +
theme_minimal() +
theme(legend.position = "bottom")gPPInc %>%
degree(.) %>%
data.frame(degree = ., Symbol = names(.)) %>%
left_join(Annot_Dic) %>%
group_by(degree, Category_cl) %>%
summarise(k = n()) %>%
ggplot() +
aes(x = degree,
y = k,
fill = Category_cl,
colour = Category_cl) +
geom_point() +
scale_fill_manual(values = values) +
scale_color_manual(values = values) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10") +
labs(x = "Degree", y = "k",
color = "Gene Category",
fill = "Gene Category") +
theme_minimal() +
theme(legend.position = "bottom")##############################
### DegreeDistribution as bin-log-log
### Start by defining the bins
##############################
DD_loglog = Nodes
DD_loglog %<>%
mutate(logdegree = log10(DD_loglog$degree))
calc_breaks = function(min_bin, max_bin, bins = 10 ){
res = 10^(seq(min_bin, max_bin,length.out= bins))
return(res)
}
bins = DD_loglog %>%
ungroup() %>%
group_by(base, Category_cl) %>%
summarise(min_bin = min(logdegree),
max_bin = max(logdegree)) %>%
split(paste(.$base, .$Category_cl, sep = "_")) %>%
map(~ calc_breaks(.$min_bin, .$max_bin) ) %>%
bind_rows() %>%
mutate( id = 1:n()) %>%
pivot_longer(data = .,
cols = -id,
names_sep = "_",
names_to = c("base", "Category_cl"))
genetype = DD_loglog$Category_cl %>% unique()
bases = DD_loglog$base %>% unique()
res = list(); k = 0
for(i in 1:length(genetype)){
for(j in 1:length(bases)){
bs = bins %>%
filter(Category_cl == genetype[i]) %>%
filter(base == bases[j]) %>%
pull(value)
hi = DD_loglog %>%
filter(Category_cl == genetype[i]) %>%
filter(base == bases[j]) %>%
pull(degree)
if(length(hi)> 0){
k = k + 1
hi %<>%
hist(., breaks = bs, plot = F)
res[[k]] = data.frame(counts = hi$counts,
density = hi$density,
mids = hi$mids,
Category_cl = genetype[i],
base = bases[j])
}
}
}
res %<>% bind_rows()
res %<>%
group_by(Category_cl, base)%>%
mutate(tick = 1) %>%
mutate(tick = cumsum(tick))Fig_S3 = res %>%
filter(base %ni% "NCI") %>%
ggplot() +
aes(x = mids,
y = density,
colour = Category_cl) +
geom_point(shape = "circle", size = 1.5) +
geom_smooth(span = 0.75, se = F, linewidth = 0.5) +
scale_color_manual(values = values) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10") +
theme_minimal() +
facet_wrap(vars(base)) +
labs (x = "k",
y = "p(k)",
color = "Element type") +
theme(legend.position = "bottom",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"))
Cairo::CairoPDF("../figs/Fig_S3.pdf", width = 10, height = 10)
Fig_S3`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
dev.off()quartz_off_screen
2
Figure S4: Fit a Power-Law
## 00_PL_fit.RTable S6: Power Law Gamma
## 00_PL_fit.RFigure S4: Gene completion mapped in the PPI and the combined network.
##############################
### Understand the percentage of genes
### that are annotated and also can be found
### in the networks.
##############################
Classificate_Summary = gPPInc %>%
igraph::as_data_frame(., what = "vertices") %>%
mutate(Category_Summarised = ifelse(Category_cl %in%
c("TF", "Protein Coding"), "Protein Coding", as.character(Category_cl))) %>%
mutate(Category_Summarised = ifelse(Category_Summarised %in%
c("lncRNA", "miRNA", "ncRNA"), "Non Coding", as.character(Category_Summarised)))
Classificate_Summary %>%
group_by(Category_Summarised) %>%
summarise(n = n()) %>%
ungroup() %>%
mutate(`%` = round(n/sum(n) * 100, 2))# A tibble: 4 × 3
Category_Summarised n `%`
<chr> <int> <dbl>
1 Non Coding 3784 14.2
2 Protein Coding 18358 69.1
3 Pseudogene 3429 12.9
4 other 1004 3.78
dist_genesncppi = gPPInc %>%
igraph::as_data_frame(., "vertices") %>%
ungroup() %>%
select(Symbol = name) %>%
unique() %>%
mutate(NCIPPI = "Yes") %>%
dplyr::full_join(., Annot_Dic) %>%
mutate(NCIPPI = ifelse(is.na(NCIPPI), "No", NCIPPI)) %>%
group_by(Category_cl, NCIPPI) %>%
summarise(n = n()) %>%
arrange(n)
Table_dist_genesppi = dist_genesncppi %>%
pivot_wider(names_from = NCIPPI, values_from = n) %>%
mutate(`% retrieved` = Yes/(Yes + No) * 100) %>%
mutate(total = (Yes + No)) %>%
arrange(total)
Table_dist_genesppi$Category_cl %<>% factor(., levels = Table_dist_genesppi$Category_cl)
Table2 = Annot_Dic %>%
select(-color)%>%
mutate(`PPI` = ifelse(Symbol %in% V(gPPI)$name, "Yes", "No")) %>%
mutate(`NCI & PPI` = ifelse(Symbol %in% V(gPPInc)$name, "Yes", "No")) %>%
select(-Symbol) %>%
pivot_longer(-Category_cl) %>%
group_by(Category_cl, name, value) %>%
summarise(n = n()) %>%
arrange(n)
l = Table2 %>%
filter(name == "NCI & PPI") %>%
group_by(Category_cl) %>%
summarise(Total = sum(n)) %>%
arrange(Total)
Table2$Category_cl %<>% factor(., levels = l$Category_cl)
Table2 %<>% dplyr::inner_join(., l) %>%
mutate(pct = round(n/Total * 100,1)) %>%
mutate(pct_lab = ifelse(value == "No", NA, pct))
lev = Table2 %>%
ungroup %>%
select(Category_cl, Total) %>%
unique() %>%
arrange(Total)
Table2$Category_cl %<>% factor(., levels = lev$Category_cl)
Fig_s4 = Table2 %>%
mutate(name = factor(name,
levels = c("PPI","NCI & PPI"),
labels = c("PPI","PPI & NCI"))) %>%
mutate(pct_lab = ifelse(!is.na(pct_lab), paste(pct_lab, "%"), "")) %>%
ggplot() +
aes(x = Category_cl,
fill = value,
weight = n) +
geom_bar() +
geom_text(aes(label = pct_lab),
stat = "count",
hjust = 1,
colour = "#a4133c") +
scale_fill_manual(
values = c(No = "#ffccd5",
Yes = "#ff4d6d")
) +
coord_flip() +
theme_minimal() +
theme(legend.position = "bottom",
text = element_text(size = 18),
axis.title = element_text(face = "bold",
hjust = 0.5),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5),
strip.text = element_text(face = "bold",
hjust = 0.5)) +
facet_wrap(vars(name)) +
labs(y = "# Genes",
x = "Gene Category",
fill = "In the data base")Cairo::CairoPDF(paste0("../figs/Fig_S4.pdf"),
height = 5,
width = 15)
Fig_s4dev.off()quartz_off_screen
2
Figure 1: The role of ncRNA in gene regulation and connection to the human interactome.
##############################
### Get Genes Category in the neworks to understand the
### pattern of gene-gene regulation
### build a network to viz results
##############################
gPPInc %<>%
simplify(., remove.multiple = F, remove.loops = TRUE)
gPPI %<>%
simplify(., remove.multiple = F, remove.loops = TRUE)
edges = gPPInc %>%
as_data_frame(., what = "edges") %>%
inner_join(Annot_Dic, by = c(from = "Symbol")) %>%
rename(from.type = Category_cl) %>%
inner_join(Annot_Dic, by = c(to = "Symbol")) %>%
rename(to.type = Category_cl) %>%
select(from.type, to.type) %>%
OrderNames() %>%
group_by(from.type, to.type) %>%
summarise(n = n()) %>%
ungroup()
nodes = gPPInc %>%
simplify(., remove.multiple = F, remove.loops = TRUE) %>%
as_data_frame(., what = "vertices") %>%
group_by(Category_cl) %>%
summarise(n = n()) %>%
ungroup()
###########################
#### Export Networks
###########################
save(gPPI, gPPInc, GDA, Annot_Dic, file = "../data/output/graphs.RData")
fig1 = graph_from_data_frame(edges, directed = F, vertices = nodes)
V(fig1)$size = (V(fig1)$n/max(V(fig1)$n) + 0.1) * 50
E(fig1)$width = (E(fig1)$n/max(E(fig1)$n) + 0.1) * 10
Cairo::CairoPDF(paste("../figs/Fig_01.pdf"), width = 12, height = 12)
par(mar=c(0,0,0,0))
plot(fig1)dev.off()quartz_off_screen
2
Text Results: We find that 250 of the 861 diseases are enriched for ncRNAs.
##############################
### Perform Proportion's test and
### multiple correction
##############################
Totals = GDA %>%
mutate(category = ifelse(Category_cl %in% c("Protein Coding", "TF"), "PC", as.character(Category_cl))) %>%
mutate(category = ifelse(category %in% c('lncRNA',
'miRNA',
'ncRNA'),
"NC",
category)) %>%
filter(category %in% c("PC", "NC")) %>%
select(category, hgnc_symbol) %>%
unique() %>%
group_by(category) %>%
summarise(n = n()) %>%
pivot_wider(names_from = category,
values_from = n)
GDA2Prop = GDA %>%
mutate(category = ifelse(Category_cl %in% c("Protein Coding", "TF"), "PC", as.character(Category_cl))) %>%
mutate(category = ifelse(category %in% c('lncRNA',
'miRNA',
'ncRNA'),
"NC",
category)) %>%
filter(category %in% c("PC", "NC")) %>%
select(DiseaseName, category, hgnc_symbol) %>%
unique() %>%
group_by(DiseaseName, category) %>%
summarise(n = n()) %>%
pivot_wider(names_from = category,
values_from = n,
values_fill = 0) %>%
arrange(desc(NC)) %>%
mutate(Total_PC = Totals$PC,
Total_NC = Totals$NC)
GDA2Prop[is.na(GDA2Prop)]<- 0
fishers.test = function(base = GDA2Prop){
tmp = list()
for(i in 1:nrow(base)){
mat = matrix(c(base$PC[i],
base$NC[i], (base$Total_PC[i] - base$PC[i]),
(base$Total_NC[i] - base$NC[i])),
byrow = TRUE,
ncol = 2)
p = fisher.test(mat, alternative = "less")$p.value
tmp[[i]] = data.frame(disease= base$DiseaseName[i], p = p)
}
tmp = bind_rows(tmp)
return(tmp)
}
props.test = function(base = GDA2Prop){
tmp = list()
for(i in 1:nrow(base)){
p = suppressWarnings(prop.test(c(base$NC[i], base$PC[i]),
c(base$Total_NC[i], base$Total_PC[i]),
alternative = "greater",
correct = TRUE))$p.value
tmp[[i]] = data.frame(disease= base$DiseaseName[i], p = p)
}
tmp = bind_rows(tmp)
return(tmp)
}
enrich = props.test()
enrich %<>%
mutate(padj = p.adjust(p, method = "fdr"))
enrich %>%
filter(padj < 0.05) %>%
nrow()[1] 250
Figure 2: Disease modules for Rheumatoid Arthritis (RA), Crohn’s Disease (CD), and Pre-Eclampsia (PE).
##############################
### Build the viz of the disease modules
##############################
leg = data.frame(color = V(gPPInc)$color,
type = V(gPPInc)$Category_cl) %>%
unique()
RAPD = plot_diseases("arthritis rheumatoid")
CDPD = plot_diseases("crohn disease")
PEPD = plot_diseases("pre eclampsia")Cairo::CairoPDF(paste("../figs/Fig_02.pdf"), width = 12, height = 12)
graphics::layout(mat = matrix(c(1,2,3,
4,5,6,
7,7,7), ncol = 3, byrow = T), heights = c(3,3,1))
par(mar = c(0,0,2,0))
g = RAPD$PPI
l = layout_with_fr(g,
weights = E(g)$weight^1/20)
l <- norm_coords(l, ymin=-1,
ymax=1,
xmin=-1,
xmax=1)
g %>%
plot(.,
vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.label.cex = (V(.)$size +0.2)*1,
edge.curved = 0.1,
edge.width = E(.)$width*2 ,
edge.weight = E(.)$weight^100,
layout = l)
title("Rheumatoid Arthritis - PPI")
g = CDPD$PPI
l = layout_with_fr(g,
weights = E(g)$weight^1/20)
l <- norm_coords(l, ymin=-1,
ymax=1,
xmin=-1,
xmax=1)
g %>%
plot(., vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.label.cex = (V(.)$size +0.2)*1,
# vertex.label = NA,
edge.curved = 0.1,
edge.width = E(.)$width*2 ,
edge.weight = E(.)$weight^100,
layout = l)
title("Crohn Disease - PPI")
g = PEPD$PPI
l = layout_with_fr(g,
weights = E(g)$weight^1/20)
l <- norm_coords(l, ymin=-1,
ymax=1,
xmin=-1,
xmax=1)
g %>%
plot(., vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.size = (V(.)$size +0.2) *5,
vertex.label.cex = (V(.)$size +0.2)*1,
# vertex.label = NA,
edge.curved = 0.1,
edge.width = E(.)$width*2 ,
edge.weight = E(.)$weight^100)
title("Pre Eclampsia - PPI")
g = RAPD$NCIPPI %>%
delete.edges(E(.)[E(.)$width^1.65 < 0.01]) %>%
delete.edges(E(.)[E(.)$width^1/2 < 0.03])
l = layout_with_fr(g,
weights = E(g)$weight^1/20)
l <- norm_coords(l, ymin=-1,
ymax=1,
xmin=-1,
xmax=1)
g %>%
plot(., vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.label.cex = (V(.)$size +0.2)*1,
# vertex.label = NA,
edge.curved = 0.1,
edge.width = (E(.)$width^2),
edge.weight = E(.)$weight^1/20,
layout = l)
title("Rheumatoid Arthritis - PPI & NCI")
g = CDPD$NCIPPI %>%
delete.edges(E(.)[E(.)$width^1.65 < 0.01]) %>%
delete.edges(E(.)[E(.)$width^1/2 < 0.03])
l = layout_with_fr(g,
weights = E(g)$weight^1/20)
l <- norm_coords(l, ymin=-1,
ymax=1,
xmin=-1,
xmax=1)
g %>%
plot(., vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.label.cex = (V(.)$size +0.2)*1,
# vertex.label = NA,
edge.curved = 0.1,
edge.width = (E(.)$width^2),
edge.weight = E(.)$weight^1/20,
layout = l)
title("Crohn Disease PPI & NCI")
g = PEPD$NCIPPI %>%
delete.edges(E(.)[E(.)$width^1.65 < 0.01]) %>%
delete.edges(E(.)[E(.)$width^1/2 < 0.03])
l = layout_with_fr(g,
weights = E(g)$weight^1/2)
# l <- norm_coords(l, ymin=-1,
# ymax=1,
# xmin=-1,
# xmax=1)
g %>%
plot(., vertex.size = ifelse(degree(.) == 0, 0.5, (V(.)$size +0.2) *5),
vertex.label.cex = (V(.)$size +0.2)*1,
# vertex.label = NA,
edge.curved = 0.1,
edge.width = (E(.)$width^2),
edge.weight = E(.)$weight^1/20,
layout = l)
title("Pre Eclampsia PPI & NCI")
plot(1, 1, xlim = c(20,30),
axes = F,
xlab = "",
ylab = "")
legend("bottom",
cex = 1.5,
# xpd = TRUE,
legend = leg$type,
col = leg$color,
pch = 19,
bty = "n",
ncol = length(leg$color)
)dev.off()quartz_off_screen
2
GDA databases
GDA_tmp = GDA %>%
select(DiseaseName, hgnc_symbol, evidence = evidences)
GDA_tmp = GDA_tmp$evidence %>%
stringr::str_split(., "\\|", simplify = TRUE) %>%
as.data.frame() %>%
cbind(GDA_tmp, . ) %>%
select(-evidence) %>%
pivot_longer(., -c(DiseaseName, hgnc_symbol), values_to = "database" ) %>%
filter(database != "") %>%
select(-name)Figure S6: Disease Associated genes and their classification.
Figure S6A: Rainplot of Number of disease-associated genes classified by gene category
##############################
### Rainplot of Number of disease-associated
### genes classified by gene category.
##############################
GDA_CAT = GDA %>%
select(DiseaseName, Category_cl, hgnc_symbol) %>%
unique() %>%
group_by(DiseaseName, Category_cl) %>%
summarise(n = n()) %>%
arrange(desc(n))
GDA_CAT$Category_cl[GDA_CAT$Category_cl == ""] <- "other"
GDA_CAT$Category_cl %<>% factor(.,
levels = c("Protein Coding",
"TF",
"Pseudogene",
"lncRNA",
"miRNA",
"other"))
fig_s6A = GDA_CAT %>%
filter(!(Category_cl %in% "other")) %>%
ggplot() +
aes(x = Category_cl,
y = n,
fill = Category_cl,
color = Category_cl) +
geom_half_violin(adjust = 1.5L,
scale = "width",
alpha = 0.5,
side = "r",
position = position_nudge(x = 0.35, y = 0)) +
geom_point(
position = position_jitterdodge(),
shape = "circle",
size = .25,
alpha = 1) +
geom_boxplot(shape = "circle",
alpha = 0.1,
width = 0.25,
outlier.shape = NA) +
# expand_limits(x = 5.25) +
scale_color_manual(values = values) +
scale_fill_manual(values = values) +
scale_y_continuous(trans = "log10") +
coord_flip() +
theme_minimal() +
labs(x = "Gene Class",
y = "# of disease associated genes") +
theme (legend.position = "none",
axis.title = element_text(face = "bold", hjust = 0.5),
axis.title.x.top =element_text(face = "bold", hjust = 0.5),
strip.text = element_text(face = "bold", hjust = 0.5)); fig_s6AFigure S6B: Degree distribution of gene-disease associations.
##############################
### Degree distribution of gene-disease associations.
##############################
DisDeg = GDA %>%
group_by(hgnc_symbol, Category_cl) %>%
summarise(degree = n()) %>%
ungroup() %>%
group_by(Category_cl, degree) %>%
summarise(n = n())
fig_s6B = DisDeg %>%
filter(!(Category_cl %in% "other")) %>%
filter(!(Category_cl %in% "")) %>%
ggplot() +
aes(
x = degree,
y = n,
fill = Category_cl,
colour = Category_cl
) +
geom_point(shape = "circle", size = 1.5, alpha = .5) +
scale_color_manual(values = values) +
scale_fill_manual(values =
values) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10",
limits = c(1, 3000)) +
theme_minimal() +
theme(legend.position = "bottom") +
labs(x = "Degree", y = "#", fill = "Gene Category", color = "Gene Category")+
theme (legend.position = "none",
axis.title = element_text(face = "bold", hjust = 0.5),
axis.title.x.top =element_text(face = "bold", hjust = 0.5),
strip.text = element_text(face = "bold", hjust = 0.5)); fig_s6BFigure S6C: The % of genes in each category found in the LCC.
##############################
### LCC is generated in 01_LCC_Sab.R
### The % of genes in each category found in the LCC.
##############################
Out = fread("../data/output/LCC.csv")
Out %<>% filter(disease %in% GDA$DiseaseName)
Out %<>%
group_by(graph) %>%
mutate(padj = p.adjust(p, method = "fdr")) %>%
ungroup() %>%
mutate(graph = ifelse(graph == "gPPInc", "PPI & NCI", graph)) %>%
mutate(graph = ifelse(graph == "gPPI", "PPI", graph))
PPI_diseases = Out %>%
arrange(disease) %>%
pull(disease) %>% unique()
pb <- progress_bar$new(
format = " Computed [:bar] :percent eta: :eta",
total = length(PPI_diseases),
clear = FALSE,
width= 60)
LCCs_Distribution_Genes = list()
for (i in 1:length(PPI_diseases)){
pb$tick()
tmp_g = GDA %>%
filter(DiseaseName %in% PPI_diseases[i]) %>%
select(hgnc_symbol, Category_cl)
tmp_ppi = gPPI %>%
induced.subgraph(., tmp_g$hgnc_symbol[tmp_g$hgnc_symbol %in% V(.)$name]) %>%
extract_LCC() %>%
as_data_frame("vertices") %>%
mutate(disease = PPI_diseases[i]) %>%
mutate(graph = "PPI")
tmp_ncppi = gPPInc %>%
induced.subgraph(., tmp_g$hgnc_symbol[tmp_g$hgnc_symbol %in% V(.)$name]) %>%
extract_LCC() %>%
as_data_frame("vertices") %>%
mutate(disease = PPI_diseases[i]) %>%
mutate(graph = "PPI & NCI")
LCCs_Distribution_Genes[[i]] = bind_rows(tmp_ppi, tmp_ncppi)
}
LCCs_Distribution_Genes %<>% bind_rows()
LCCs_Distribution_Genes_summary = LCCs_Distribution_Genes %>%
group_by(Category_cl, disease, graph) %>%
summarise(in_LCC = n()) %>%
rename(DiseaseName = disease) %>%
pivot_wider(names_from = graph,
values_from = in_LCC,
values_fill = 0,
names_prefix = "in_LCC_")
LCCs_Distribution_Genes_summary %<>%
dplyr::left_join(., GDA_CAT) %>%
mutate(prop_LCC_PPINCI = (`in_LCC_PPI & NCI`/n)*100,
prop_LCC_PPI = (`in_LCC_PPI`/n)*100)
LCCs_Distribution_Genes_summary_l = LCCs_Distribution_Genes_summary %>%
select(Category_cl, DiseaseName, starts_with("prop")) %>%
pivot_longer(-c(1:2))
LCCs_Distribution_Genes_summary_l$Category_cl %<>% factor(.,
levels = c("Protein Coding",
"TF",
"Pseudogene",
"lncRNA",
"miRNA",
"other"))
fig_s6C = LCCs_Distribution_Genes_summary_l %>%
mutate(graph = ifelse(name == "prop_LCC_PPINCI", "PPI & NCI", "PPI")) %>%
filter(Category_cl != "other") %>%
ggplot() +
aes(x = value,
y = Category_cl,
fill = graph,
color = graph) +
geom_boxplot(alpha = 0.5) +
scale_fill_manual(values = PPI_colors) +
scale_color_manual(values = PPI_colors) +
labs(
x = "% of genes in the LCC",
y = "Gene Category",
fill = "Network",
colour = "Network"
) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title = element_text(
face = "bold",
hjust = 0.5),
axis.title.x.top =element_text(
face = "bold",
hjust = 0.5),
strip.text = element_text(
face = "bold",
hjust = 0.5))Cairo::CairoPDF("../figs/Fig_S6.pdf", width = 15, height = 6)
fig_s6A + fig_s6B + fig_s6C +
plot_annotation(tag_levels = 'A')dev.off()quartz_off_screen
2
Figure 3: Uncovering Gene-Disease Associations.
Disease Modules are computed in 01_LCC_Sab.R.
Figure 3A: Euler Plot
##############################
### Overlap of significant diseases in the PPI and the PPI & NCI
### Euler plot: Similar to the usual Venn Diagram,
### but with proportional areas
##############################
Euler = list()
Euler$PPI = Out %>%
filter(padj < lccp) %>%
filter(graph == "PPI") %>%
pull(disease)
Euler$`PPINCI` = Out %>%
filter(padj < lccp) %>%
filter(graph == "PPI & NCI") %>%
pull(disease)
require(ggforce)
teste_eu = eulerr::euler(Euler)
fig_3A = teste_eu$ellipses %>%
mutate(r = a/2) %>%
mutate(x = h) %>%
mutate(y = k) %>%
mutate(total = c(35, 132)) %>%
mutate(labels = c("PPI", "PPI & NCI")) %>%
ggplot() +
geom_circle(aes(x0 = x, y0 = y, r = r,
fill=labels,
color = labels),
alpha = .5) +
geom_text(aes(x = x, y = y, label = labels)) +
geom_text(aes(x = x, y = y - 0.5, label = total)) +
scale_fill_manual(values = PPI_colors) +
scale_color_manual(values = PPI_colors) +
coord_fixed() +
theme_void() +
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"))
fig_3AFigure 3B: Dumbell Plot
##############################
### Dumbell plot for rLCC size in the PPI
### and in the PPI & NCI
##############################
PPI_diseases = c("insulin resistance",
"astrocytoma",
"thymoma")
NCIPPI_diseases = c('alopecia', 'anxiety disorders',
'autoimmune diseases', 'carcinoma endometrioid',
'celiac disease',
'cleft lip', 'dementia',
'diabetes gestational', 'glaucoma',
'myocarditis', 'polycystic ovary syndrome',
'pre eclampsia', "testicular neoplasms",
"pancreatitis" , "osteochondrodysplasias" ,
"neutropenia" , "retinitis pigmentosa")
both_diseases = c('thrombosis', 'stroke',
'psoriasis', 'parkinson disease',
'osteoarthritis', 'neuroblastoma',
'myocardial infarction', 'mood disorders',
'melanoma', 'inflammatory bowel diseases',
'glioma', 'endometriosis', 'bipolar disorder')
Dis_class = GDA %>%
select(DiseaseName, DiseaseClass = DescriptorName) %>%
unique() %>%
rename(disease = DiseaseName)
Dis_class_dic = Dis_class$DiseaseClass %>%
stringr::str_split("\\|", simplify = T) %>%
as.data.frame() %>%
mutate(DiseaseName = Dis_class$disease) %>%
pivot_longer(., - DiseaseName) %>%
filter(value != "") %>%
select( - name) %>%
group_by(value) %>%
mutate(n = n()) %>%
ungroup() %>%
group_by(DiseaseName) %>%
mutate(max = max(n)) %>%
filter(n == max) %>%
mutate(max_dis = max(value)) %>%
filter(max_dis == value) %>%
unique() %>%
select(1:2) %>%
group_by(value) %>%
mutate(n = n()) %>%
mutate(DiseaseClass =
ifelse(n < 20, "other", value)) %>%
ungroup()
prepare_dumbell = Out %>%
dplyr::inner_join(Dis_class) %>%
mutate(rLCC = LCC/Targets * 100) %>%
select(disease, graph, rLCC, DiseaseClass, padj) %>%
mutate(label = ifelse(disease %in% Interest, disease, NA)) %>%
mutate(size = ifelse(disease %in% Interest, 0.5, 2)) %>%
mutate(alpha = ifelse(disease %in% Interest, 0.2, 1))
levels_dis = prepare_dumbell %>%
filter(disease %in% c(Interest, PPI_diseases,
NCIPPI_diseases, both_diseases)) %>%
group_by(disease) %>%
summarise(maxrLCC = max(rLCC)) %>%
arrange(maxrLCC) %>%
pull(disease) %>%
unique()
fig_3B = prepare_dumbell %>%
filter(disease %in% c(Interest, PPI_diseases,
NCIPPI_diseases, both_diseases)) %>%
mutate(disease = factor(disease, levels = levels_dis)) %>%
mutate(sign = ifelse(padj < lccp, "padj < 0.05", "padj > 0.05")) %>%
ggplot(.) +
aes(x = rLCC,
y = disease,
color = graph) +
geom_vline(xintercept = 50, color = "#d62828", alpha = 0.3) +
geom_line(aes(group = disease), linewidth = 1, color = "#a1a5a4") +
geom_point(shape = 19, size = 5, color = "white") +
geom_point(aes(shape = sign), size = 5) +
scale_color_manual(values = PPI_colors)+
scale_x_continuous(limits = c(0, 100)) +
scale_shape_manual(values = c(19, 1))+
theme_minimal()+
theme(legend.position = "bottom") +
labs(x = "rLCC (%)", y = "Disease", shape = element_blank(), color = element_blank())+
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"))
fig_3BFigure 3C: Histogram
##############################
### Histogram of the LCCs in the PPI and in the PPI & NCI
##############################
fig_3C = Out %>%
mutate(label = ifelse(disease %in% Interest, disease, NA)) %>%
mutate(graph = factor(graph,
levels = c("PPI",
"PPI & NCI"))) %>%
filter(padj < lccp) %>%
filter(graph != "NCI") %>%
ggplot() +
aes(x = LCC,
fill = graph,
colour = graph,
size = padj,
label = label) +
geom_histogram(bins = 100, alpha = 0.3) +
geom_text_repel( y = 0,
min.segment.length = 0,
seed = 777,
size = 5,
nudge_x = 2,
box.padding = 1,
nudge_y = 2,
segment.curvature = -0.1,
segment.ncp = 30,
segment.angle = 20) +
scale_fill_manual(
values = PPI_colors
) +
scale_color_manual(
values = PPI_colors
) +
scale_x_continuous(trans = "log10",
labels = scales::label_number())+
theme_minimal() +
labs(y = "Frequency",
x = "LCC size") +
facet_wrap(vars(graph), scales = "free_y", ncol = 1L)+
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"));
fig_3CFigure 3D: Scatterplot pvalue
##############################
### Scatterplot of the p-values
### in the PPI and the PPI & NCI
##############################
colors = paste0("#",c("ef476f","f36a6d","f78c6b",
"fbaf69","fdc068","ffd166",
"88ae8c","4d9c9f",
"118ab2","0f96ae",
"0ca1a9","0ab594",
"06d6a0","a1a5a4"))
names(colors) = c(
"Cardiovascular Diseases",
"Congenital, Hereditary, and Neonatal Diseases and Abnormalities",
"Digestive System Diseases" ,
"Female Urogenital Diseases and Pregnancy Complications",
"Hemic and Lymphatic Diseases",
"Mental Disorders" ,
"Musculoskeletal Diseases" ,
"Neoplasms" ,
"Nervous System Diseases",
"Nutritional and Metabolic Diseases" ,
"Pathological Conditions, Signs and Symptoms" ,
"Respiratory Tract Diseases" ,
"Skin and Connective Tissue Diseases",
"other"
)
colors_original = colors
labels_colors = c(
"Cardiovascular",
"Congenital",
"Digestive" ,
"Female",
"Lymphatic",
"Mental Disorders" ,
"Musculoskeletal" ,
"Neoplasms" ,
"Nervous System",
"Metabolic" ,
"Pathologies" ,
"Respiratory" ,
"Skin Tissue",
"Other"
)
Out2 = Out %>%
select(Targets,
disease,
graph,
LCC,
p,
padj) %>%
pivot_wider(names_from = c(graph),
values_from = c(p, padj, LCC, Targets)) %>%
na.exclude() %>%
janitor::clean_names()
Out2 %<>% left_join(., Dis_class_dic %>%
select(disease = DiseaseName,
DiseaseClass = value)) %>%
group_by(DiseaseClass) %>%
mutate(n = n()) %>%
mutate(DiseaseClass= ifelse(n > 15, DiseaseClass, "Other")) %>%
ungroup()
fig_3D = Out2 %>%
mutate(label = ifelse(disease %in% Interest, disease, NA)) %>%
mutate(p_ppi_nci = ifelse(padj_ppi_nci < 0.00001, 0.00001, padj_ppi_nci)) %>%
mutate(p_ppi = ifelse(padj_ppi < 0.00001, 0.00001, padj_ppi)) %>%
ggplot() +
aes(y = p_ppi_nci,
x = p_ppi,
size = lcc_ppi_nci,
label = label,
color = DiseaseClass) +
geom_abline(slope = 1, color = "gray80") +
geom_hline(yintercept = 0.05, color = "#d62828", alpha = 0.3) +
geom_vline(xintercept = 0.05, color = "#d62828", alpha = 0.3) +
geom_point(shape = "circle",
alpha = 0.5,
show.legend = F
) +
geom_text_repel(min.segment.length = 0,
seed = 777,
size = 5, nudge_x = .15,
box.padding = 0.5,
nudge_y = 1,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20) +
scale_size(range = c(0,3), trans = "log2") +
scale_y_continuous(trans = "log10",
labels = scales::label_scientific())+
scale_x_continuous(trans = "log10",
labels = scales::label_scientific())+
labs(y = "pvalue PPI & NCI (FDR corrected)",
x = "pvalue PPI (FDR corrected)",
size = "LCC PPI & NCI",
color = NULL) +
scale_color_manual(values = colors)+
theme_minimal() +
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue")) ; fig_3DFigure 3E: Scatterplot Genes vs LCC
##############################
### Genes associated with disease and the LCC size
##############################
prepare_scatter_3e = GDA %>%
select(disease = DiseaseName, Total_Genes) %>%
unique() %>%
inner_join(Out) %>%
mutate(rLCC = (LCC/Targets)*100) %>%
select(disease, Total_Genes, LCC, rLCC, graph)
fig_3E = prepare_scatter_3e %>%
group_by(graph) %>%
mutate(max_rlcc = max(Total_Genes)) %>%
mutate(label = ifelse(Total_Genes == max_rlcc, graph, NA)) %>%
mutate(label = ifelse(disease %in% Interest, disease, label)) %>%
filter(!(graph %in% "NCI")) %>%
ggplot() +
aes(x = Total_Genes,
y = LCC,
size = rLCC,
colour = graph,
fill = graph,
label = label) +
geom_point(shape = "circle", alpha = 0.5) +
geom_text_repel(min.segment.length = 0,
seed = 777,
size = 5,
nudge_x = 1,
box.padding = 0.5,
nudge_y = 1,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20) +
geom_smooth(se = TRUE) +
geom_abline(slope = 1, color = "gray80") +
scale_color_manual(values = PPI_colors) +
scale_fill_manual(values = PPI_colors) +
scale_size(range = c(0, 3), guide = "none") +
theme_minimal() +
labs(x = "# Genes", y = "LCC")+
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue")); fig_3EFigure 3F: Half Violins
##############################
### Proportion of non-coding genes.
##############################
LCC_NC = Out2 %>%
dplyr::inner_join(GDA2Prop, by = c("disease"= "DiseaseName")) %>%
mutate(ratio_ncc = NC / (NC + PC))
l = LCC_NC %>%
group_by(DiseaseClass) %>%
summarise(min = min(ratio_ncc),
max = max(ratio_ncc),
med = median(ratio_ncc)) %>%
arrange(max, med) %>%
select(DiseaseClass)
l %<>% dplyr::inner_join(.,
data.frame(DiseaseClass =
names(colors),
label = labels_colors))
colors_original = colors
names(colors) = labels_colors
LCC_NC$DiseaseClass[LCC_NC$DiseaseClass %ni% l$DiseaseClass] <- "Other"
fig_3F = LCC_NC %>%
mutate(label =
ifelse(disease %in% Interest, disease, NA)) %>%
mutate(DiseaseClass = factor(DiseaseClass,
levels = c(l$DiseaseClass, "Other"),
labels = c(l$label, "Other"))) %>%
ggplot() +
aes(
x = DiseaseClass,
y = ratio_ncc * 100,
fill = DiseaseClass,
colour = DiseaseClass,
label = label
) +
geom_half_violin( scale = "width",
side = "r",
alpha = 0.5) +
geom_half_point(size = 0.3,
show.legend = F,
width = 0.5) +
geom_text_repel(min.segment.length = 0,
seed = 777,
size = 5,
nudge_x = 1,
box.padding = 0.5,
nudge_y = 1,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20) +
labs(
y = "Non Coding Genes Ratio (%)",
x = "Disease Class",
fill = NULL,
color = NULL
) +
scale_y_continuous(limits = c(0, 100)) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors) +
theme_minimal() +
coord_flip() +
theme(legend.position = "none",
text = element_text(size = 18,
family = "HelveticaNeue"),
axis.title = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
axis.title.x.top = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"),
strip.text = element_text(face = "bold",
hjust = 0.5,
family = "HelveticaNeue"))
fig_3FFigure 4: Physically-Interacting genes are co-expressed.
The data is created in 02_Corr.R and performance is evaluated in 03_Corr_Performance.
Figure 5: Disease similarity on the two networks.
Disease Similarities are computed in 01_LCC_Sab.R.
Figure 5A: RA comorbidities
##############################
### Comorbidities data is derived from
### doi: 10.1371/journal.pcbi.1000353
##############################
TDN = fread("../data/input/Comorbidities_RR.csv") %>%
janitor::clean_names() %>%
rename(x = disease_1,
y = disease_2,
como_p = p_12) %>%
rename(RR = `rel_risk_12`) %>%
mutate(percent = (co_12/(prev_1 + prev_2 - co_12))*100)
GeneOverlap = fread("../data/output/SAB_JAC.csv")
GeneOverlap %<>%
mutate(neg = ifelse(Sab < 0, "sab<0", "sab>0")) %>%
group_by(graph, neg) %>%
mutate(Sab_norm = CoDiNA::normalize(abs(Sab))) %>%
ungroup() %>%
select(-neg) %>%
group_by(graph) %>%
mutate(Jac_p = 1-p) %>%
select(-p) %>%
rename(Sab_p = pvalue_lt) %>%
mutate(graph = ifelse(graph == "gPPInc", "PPI & NCI", graph)) %>%
mutate(graph = ifelse(graph == "gPPI", "PPI", graph))
GeneOverlap2 = GeneOverlap %>%
filter((x %in% Euler$PPI & y %in% Euler$PPI & graph %in% "PPI") | (x %in% Euler$PPINCI & y %in% Euler$PPINCI & graph %in% "PPI & NCI") ) %>%
left_join(TDN) %>%
group_by(graph) %>%
mutate(como_padj = p.adjust(como_p, method = "fdr")) %>%
mutate(Sab_padj = p.adjust(Sab_p, method = "fdr")) %>%
mutate(Jac_padj = p.adjust(Jac_p, method = "fdr")) %>%
ungroup() %>%
mutate(sign_sab_jac_sabn = ifelse(Jac_padj < overlapp &
Sab < 0 &
Sab_norm >= 0.05 &
Sab_p < sabp &
Jaccard.Index > 0
, "Jac & Sab & Sabn: sign", "Jac & Sab & Sabn: not sign")) %>%
mutate(sign_sab_p = ifelse(Sab_p < sabp &
Sab < 0 , "sab: sign", "sab: not sign")) %>%
mutate(sign_sab = ifelse(Sab < 0 , "sab < 0", "sab > 0")) %>%
mutate(sign_jac = ifelse(Jaccard.Index > 0 & Jac_padj < overlapp , "jac == 0", "jac > 0")) %>%
mutate(group = paste(x, y, sep = " -- ")) %>%
mutate(sign_como = ifelse(como_padj < comop, "Como: sign", "Como: not sign")) %>%
mutate(sign_sab_jac = ifelse(Jaccard.Index > 0 & Jac_p < overlapp & Sab < 0 & Sab_p < sabp , "Sab & Jac: sign", "Sab & Jac: not sign"))
GeneOverlap2$graph %<>%
factor(., levels = c( "PPI", "PPI & NCI" ))
GeneOverlap_raw = GeneOverlap2 %>%
filter(!is.na(RR))
table(GeneOverlap_raw$sign_sab_jac_sabn, GeneOverlap_raw$sign_como)
Como: not sign Como: sign
Jac & Sab & Sabn: not sign 9215 7712
Jac & Sab & Sabn: sign 193 282
nodes_sab_alt = Out %>%
filter(padj < lccp) %>%
filter(!(disease %like% "experimental")) %>%
filter(!(disease %like% "chronic")) %>%
filter(!(disease %like% "diffuse")) %>%
filter(graph %in% c("PPI", "PPI & NCI")) %>%
mutate(rLCC = LCC/Targets) %>%
select(disease, graph, rLCC) %>%
pivot_wider(names_from = graph,
values_from = rLCC,
names_prefix = "rLCC") %>%
mutate(PPI_sig = ifelse(!is.na(rLCCPPI),1,0))%>%
mutate(PPI_NCI_sig = ifelse(!is.na(`rLCCPPI & NCI`),1,0)) %>%
dplyr::left_join(., Dis_class_dic, by = c("disease" = "DiseaseName")) %>%
dplyr::left_join(., data.frame(DiseaseClass = names(colors_original), color = colors_original), by = c("DiseaseClass" = "DiseaseClass"))
Sabs_Alternative = GeneOverlap2 %>%
filter(graph %in% c("PPI", "PPI & NCI")) %>%
mutate(PPI_DIS = ifelse(graph == "PPI" & x %in% Euler$PPI & y %in% Euler$PPI, 1,0)) %>%
mutate(NCIPPI_DIS = ifelse(graph == "PPI & NCI" & x %in% Euler$PPINCI & y %in% Euler$PPINCI, 1,0)) %>%
filter(NCIPPI_DIS == 1 | PPI_DIS == 1) %>%
filter(Sab < 0) %>%
group_by(graph) %>%
filter(Sab_norm > 0.07) %>%
ungroup() %>%
filter(Jaccard.Index > 0.05) %>%
filter(Sab_p < sabp) %>%
filter(Jac_padj < overlapp) %>%
filter(x %in% nodes_sab_alt$disease) %>%
filter(y %in% nodes_sab_alt$disease)Figure 5B: RA comorbidities Expanded
##############################
### Create the full SAB network
##############################
g_sabs_alt = graph_from_data_frame(Sabs_Alternative,
directed = F,
nodes_sab_alt) %>%
delete.vertices(., degree(.) == 0 )
vs = V(g_sabs_alt)
values <- lapply(1:length(vs), function(x) c(V(g_sabs_alt)$PPI_sig[x], V(g_sabs_alt)$PPI_NCI_sig[x]))
V(g_sabs_alt)$frame.color = NA
E(g_sabs_alt)$color = ifelse(E(g_sabs_alt)$graph == "PPI & NCI", "#ABE4ED", "#B998AE")
E(g_sabs_alt)$width = abs(E(g_sabs_alt)$Sab) * 2
E(g_sabs_alt)$weight = abs(E(g_sabs_alt)$Sab)^ 2
V(g_sabs_alt)$size = degree(g_sabs_alt) %>%
CoDiNA::normalize()
V(g_sabs_alt)$size = (V(g_sabs_alt)$size +0.5)*5
##############################
### Filter for RA
##############################
diseases_2 = g_sabs_alt %>%
neighborhood(., nodes = "arthritis rheumatoid")
diseases_2[[1]]
+ 32/515 vertices, named, from 22f333a:
[1] arthritis rheumatoid colonic neoplasms
[3] lung neoplasms atherosclerosis
[5] carcinoma non small cell lung inflammatory bowel diseases
[7] neoplasms inflammation
[9] melanoma diabetes mellitus type 1
[11] diabetes mellitus type 2 adenocarcinoma
[13] ovarian neoplasms pancreatic neoplasms
[15] stroke osteoarthritis
[17] heart failure carcinoma renal cell
[19] urinary bladder neoplasms glioma
+ ... omitted several vertices
g_dis_zoom = g_sabs_alt %>%
delete.vertices(., V(.)$name %ni% names(unlist(diseases_2)))
V(g_dis_zoom)$label = V(g_dis_zoom)$name
vs = V(g_dis_zoom)
values <- lapply(1:length(vs), function(x) c(V(g_dis_zoom)$PPI_sig[x], V(g_dis_zoom)$PPI_NCI_sig[x]))
V(g_dis_zoom)$size = degree(g_dis_zoom) %>%
CoDiNA::normalize()
V(g_dis_zoom)$size = (V(g_dis_zoom)$size +0.5)*8
Cairo::CairoPDF("../figs/Fig_05B.pdf",
width = 15, height = 10)
par(mfrow = c(1,2),
mar = c(1, 5, 1, 1))
ll = g_dis_zoom %>%
layout_with_fr(., weights = abs(E(.)$Sab) * 10,
start.temp = 10) %>%
igraph::norm_coords()
V(g_dis_zoom)$label.color = "#686868"
V(g_dis_zoom)$label = V(g_dis_zoom)$name
vs = V(g_dis_zoom)
values <- lapply(1:length(vs), function(x) c(V(g_dis_zoom)$PPI_sig[x], V(g_dis_zoom)$PPI_NCI_sig[x]))
E(g_dis_zoom)[!("arthritis rheumatoid" %--% 1:1000)]$color = adjustcolor("gray75", alpha.f = 0.3)
g_dis_zoom %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 0])%>%
plot(,
vertex.shape="pie",
vertex.label.dist=1,
vertex.pie=values,
edge.curved = 0.1,
edge.width = abs(E(.)$Sab) * 15,
layout = ll,
vertex.pie.color=list(PPI_colors))
title("PPI")
g_dis_zoom %>%
delete_edges(., E(.)[E(.)$NCIPPI_DIS == 0])%>%
plot(,
vertex.shape="pie",
vertex.label.dist=1,
vertex.pie=values,
edge.curved = 0.1,
edge.width = abs(E(.)$Sab) * 10,
layout = ll,
vertex.pie.color=list(PPI_colors))
title("PPI & NCI")dev.off()quartz_off_screen
2
Figure 5C: Full Comorbidities Map
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 1]) %>%
delete.vertices(., degree(.) == 0)IGRAPH 93b9e51 UNW- 466 2659 --
+ attr: name (v/c), rLCCPPI (v/n), rLCCPPI & NCI (v/n), PPI_sig (v/n),
| PPI_NCI_sig (v/n), value (v/c), n (v/n), DiseaseClass (v/c), color
| (v/c), frame.color (v/l), size (v/n), graph (e/c), Jaccard.Index
| (e/n), Sab (e/n), Sab_p (e/n), Sab_norm (e/n), Jac_p (e/n), co_12
| (e/n), prev_1 (e/n), prev_2 (e/n), total (e/n), RR (e/n), lwr_ci_12
| (e/n), upr_ci_12 (e/n), como_p (e/n), rel_risk_21 (e/n), lwr_ci_21
| (e/n), upr_ci_21 (e/n), p_21 (e/n), percent (e/n), como_padj (e/n),
| Sab_padj (e/n), Jac_padj (e/n), sign_sab_jac_sabn (e/c), sign_sab_p
| (e/c), sign_sab (e/c), sign_jac (e/c), group (e/c), sign_como (e/c),
| sign_sab_jac (e/c), PPI_DIS (e/n), NCIPPI_DIS (e/n), color (e/c),
| width (e/n), weight (e/n)
+ edges from 93b9e51 (vertex names):
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 1]) %>%
extract_LCC()IGRAPH c644c04 UNW- 411 2620 --
+ attr: name (v/c), rLCCPPI (v/n), rLCCPPI & NCI (v/n), PPI_sig (v/n),
| PPI_NCI_sig (v/n), value (v/c), n (v/n), DiseaseClass (v/c), color
| (v/c), frame.color (v/l), size (v/n), graph (e/c), Jaccard.Index
| (e/n), Sab (e/n), Sab_p (e/n), Sab_norm (e/n), Jac_p (e/n), co_12
| (e/n), prev_1 (e/n), prev_2 (e/n), total (e/n), RR (e/n), lwr_ci_12
| (e/n), upr_ci_12 (e/n), como_p (e/n), rel_risk_21 (e/n), lwr_ci_21
| (e/n), upr_ci_21 (e/n), p_21 (e/n), percent (e/n), como_padj (e/n),
| Sab_padj (e/n), Jac_padj (e/n), sign_sab_jac_sabn (e/c), sign_sab_p
| (e/c), sign_sab (e/c), sign_jac (e/c), group (e/c), sign_como (e/c),
| sign_sab_jac (e/c), PPI_DIS (e/n), NCIPPI_DIS (e/n), color (e/c),
| width (e/n), weight (e/n)
+ edges from c644c04 (vertex names):
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 0])%>%
delete.vertices(., degree(.) == 0)IGRAPH b2e958b UNW- 350 543 --
+ attr: name (v/c), rLCCPPI (v/n), rLCCPPI & NCI (v/n), PPI_sig (v/n),
| PPI_NCI_sig (v/n), value (v/c), n (v/n), DiseaseClass (v/c), color
| (v/c), frame.color (v/l), size (v/n), graph (e/c), Jaccard.Index
| (e/n), Sab (e/n), Sab_p (e/n), Sab_norm (e/n), Jac_p (e/n), co_12
| (e/n), prev_1 (e/n), prev_2 (e/n), total (e/n), RR (e/n), lwr_ci_12
| (e/n), upr_ci_12 (e/n), como_p (e/n), rel_risk_21 (e/n), lwr_ci_21
| (e/n), upr_ci_21 (e/n), p_21 (e/n), percent (e/n), como_padj (e/n),
| Sab_padj (e/n), Jac_padj (e/n), sign_sab_jac_sabn (e/c), sign_sab_p
| (e/c), sign_sab (e/c), sign_jac (e/c), group (e/c), sign_como (e/c),
| sign_sab_jac (e/c), PPI_DIS (e/n), NCIPPI_DIS (e/n), color (e/c),
| width (e/n), weight (e/n)
+ edges from b2e958b (vertex names):
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 0]) %>%
extract_LCC()IGRAPH 1477e3e UNW- 258 477 --
+ attr: name (v/c), rLCCPPI (v/n), rLCCPPI & NCI (v/n), PPI_sig (v/n),
| PPI_NCI_sig (v/n), value (v/c), n (v/n), DiseaseClass (v/c), color
| (v/c), frame.color (v/l), size (v/n), graph (e/c), Jaccard.Index
| (e/n), Sab (e/n), Sab_p (e/n), Sab_norm (e/n), Jac_p (e/n), co_12
| (e/n), prev_1 (e/n), prev_2 (e/n), total (e/n), RR (e/n), lwr_ci_12
| (e/n), upr_ci_12 (e/n), como_p (e/n), rel_risk_21 (e/n), lwr_ci_21
| (e/n), upr_ci_21 (e/n), p_21 (e/n), percent (e/n), como_padj (e/n),
| Sab_padj (e/n), Jac_padj (e/n), sign_sab_jac_sabn (e/c), sign_sab_p
| (e/c), sign_sab (e/c), sign_jac (e/c), group (e/c), sign_como (e/c),
| sign_sab_jac (e/c), PPI_DIS (e/n), NCIPPI_DIS (e/n), color (e/c),
| width (e/n), weight (e/n)
+ edges from 1477e3e (vertex names):
Cairo::CairoPDF("../figs/Fig_05C.pdf",
width = 18, height = 10)
par(mfrow = c(1,2),
mar = c(0, 0, 5, 0))
ll = g_sabs_alt %>%
layout_with_fr(., weights = abs(E(.)$Sab_norm) *3) %>%
igraph::norm_coords()
V(g_sabs_alt)$label.color = "#686868"
V(g_sabs_alt)$label = ifelse(V(g_sabs_alt)$name %in% c(Interest), V(g_sabs_alt)$name, NA)
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS == 0])%>%
plot(,
edge.curved = 0.1,
vertex.frame.color = V(.)$color,
vertex.color = ifelse(V(.)$PPI_sig == 0, "white", V(.)$color),
layout = ll,
vertex.size = ifelse(is.na(V(.)$rLCCPPI), 0.5,V(.)$rLCCPPI)*2.5)
title("PPI")
g_sabs_alt %>%
delete_edges(., E(.)[E(.)$PPI_DIS != 0])%>%
plot(,
edge.curved = 0.1,
vertex.frame.color = V(.)$color,
vertex.color = ifelse(V(.)$PPI_NCI_sig == 0, "white", V(.)$color),
layout = ll,
vertex.size = ifelse(is.na(V(.)$`rLCCPPI & NCI`), 0.5,V(.)$`rLCCPPI & NCI`)*2.5)
title("PPI & NCI")dev.off()quartz_off_screen
2
sessionInfo()R version 4.2.0 (2022-04-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur/Monterey 10.16
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] ggforce_0.4.1 knitr_1.44 progress_1.2.2 CoDiNA_1.1.2
[5] igraph_1.5.1 NetSci_1.0.0 poweRlaw_0.70.6 janitor_2.2.0
[9] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.3
[13] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
[17] tidyverse_2.0.0 data.table_1.14.8 magrittr_2.0.3 Cairo_1.6-1
[21] UpSetR_1.4.0 plotly_4.10.2 scales_1.2.1 ggrepel_0.9.3
[25] showtext_0.9-6 showtextdb_3.0 sysfonts_0.8.8 patchwork_1.1.3
[29] gghalves_0.1.4 ggplot2_3.4.3
loaded via a namespace (and not attached):
[1] httr_1.4.7 jsonlite_1.8.7 viridisLite_0.4.2 splines_4.2.0
[5] yaml_2.3.7 pillar_1.9.0 lattice_0.21-9 glue_1.6.2
[9] digest_0.6.33 polyclip_1.10-6 snakecase_0.11.1 eulerr_7.0.0
[13] colorspace_2.1-0 htmltools_0.5.6.1 Matrix_1.6-1.1 plyr_1.8.9
[17] pkgconfig_2.0.3 tweenr_2.0.2 tzdb_0.4.0 pracma_2.4.2
[21] timechange_0.2.0 mgcv_1.9-0 generics_0.1.3 farver_2.1.1
[25] withr_2.5.1 lazyeval_0.2.2 cli_3.6.1 crayon_1.5.2
[29] evaluate_0.22 fansi_1.0.5 MASS_7.3-60 nlme_3.1-163
[33] RcppZiggurat_0.1.6 tools_4.2.0 prettyunits_1.2.0 hms_1.1.3
[37] lifecycle_1.0.3 munsell_0.5.0 Rfast_2.0.8 compiler_4.2.0
[41] rlang_1.1.1 grid_4.2.0 rstudioapi_0.15.0 htmlwidgets_1.6.2
[45] visNetwork_2.1.2 labeling_0.4.3 rmarkdown_2.25 gtable_0.3.4
[49] reshape2_1.4.4 R6_2.5.1 gridExtra_2.3 fastmap_1.1.1
[53] utf8_1.2.3 stringi_1.7.12 Rcpp_1.0.11 vctrs_0.6.3
[57] tidyselect_1.2.0 xfun_0.40